# THIS CODES RUNS THE SIMULATIONS FOR THE PAPER
# "Where's the Bias", by Cesar Zucco Jr.
# Simpler version of the simulation, that relies on a normal (instead of a beta) distribution of votes shares across districts
#
# It simulates the seat shares given the vote share at three levels of variance of vote
# shares across districts (0, 10, 20). For each of these variance levels the aggregate 
# vote share is allowed to varry from 10 to 90% with 1% intervals. For each aggregate vote
# share it draws 300 sets of 60 district vote shares from a normal distribution with each of
# sd levels and mean = to the aggregate vote share.
# Bottomline: As variance in voteshare across districts increases, the system becomes more proportional!!

par(mfrow=c(3,1),mar=c(4,4,1,1),ask=FALSE) #mar(bottom, left, top, right)
sd = 0                                  #Sets the initial variance level (no variance)
while (sd<=0.20){                       #Determines that the procedure will be repeated 
                                        #while variance < 0.2, which amounts to 3 times
trials <- 300
seat.dist <- matrix(nrow=1,ncol=5)      #creates the matrix that will store the results
m = 0.10                                #establishes the lower bound for the vote share
while (m < 0.90) {                      #establishes the upper bound for the vote share
total.seats<-vector(length=0)
for(i in 1:trials){
v.shares <- rnorm(60,  mean=m, sd=sd)   #draws 60 vote shares from the same normal distribution
s0 <- v.shares < 1/3                    #creates three vectors (60x1)with seats in each district
s1 <- v.shares >= 1/3 & v.shares <= 2/3 #for all districts, one of the three s0,s1,s2 will be 1 
s2 <- v.shares > 2/3                    #and the other two 0, indicating whether 0, 1 or 2 were won
ss <- cbind(s0,s1,s2)                   #concatenates the three vectors into 1 matrix [60x3]
seats <- ss %*% c(0,1,2)                #multiply the matrix by a [3x1]vector 0,1,2. 
                                        #The result is a vector [60x1] of seats won 
total.seats<-rbind(total.seats
    ,sum(seats))                        #adds the total number of seats and appends it to 
}                                       #bottom of vector "total seats", trials times
sorted.seats<-sort(total.seats)         #sorts the total number of seats
lower.ci <- sorted.seats[(trials*0.025)] #identifies the 5% conf. interval bounds
upper.ci <- sorted.seats[(trials*0.975)]
seats.m <- matrix(c(m,sd,mean(total.seats),lower.ci,upper.ci),ncol=5) 
                                        #saves seat share, sd, number of seats, and bounds into
                                        #a [1,5] vector called seats.m
seat.dist <- rbind(seat.dist,seats.m)   #appends seat.m on the bottom of seat.dist
m = m + 0.01                           #raises m (vote share) for the next simulation
}

plot(seat.dist[,1],seat.dist[,3]/120
    ,pch = 20
    ,ylim = range(0,1)
    ,xlim = range(0,1)
    ,ylab = "Seat Share"
    ,xlab = "Average vote share across districts"
    )
    abline(0,1)
    text(0,1, label=paste("St. Deviation =",sd), pos=4, lwd=3)
#   lines(seat.dist[,1],seat.dist[,4]/120
#        ,lty=3)
#   lines(seat.dist[,1],seat.dist[,5]/120
#        ,lty=3)
rm(m,s0,s1,s2,ss,seats,total.seats,sorted.seats,lower.ci,upper.ci,seats.m)
sd <- sd + 0.1                          #Adds 0.1 to the variance for the next round
}
